# Study the Data Analyst job title and it's salry estimation.
# importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# reading the data so lets import the data(the data set which where used is downloaded from Kaggle)
job_data = pd.read_csv(r"E:\Data scince\job\DataAnalyst.csv")
job_data
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Data Analyst, Center on Immigration and Justic... | $37K-$66K (Glassdoor est.) | Are you eager to roll up your sleeves and harn... | 3.2 | Vera Institute of Justice\n3.2 | New York, NY | New York, NY | 201 to 500 employees | 1961 | Nonprofit Organization | Social Assistance | Non-Profit | $100 to $500 million (USD) | -1 | True |
| 1 | 1 | Quality Data Analyst | $37K-$66K (Glassdoor est.) | Overview\n\nProvides analytical and technical ... | 3.8 | Visiting Nurse Service of New York\n3.8 | New York, NY | New York, NY | 10000+ employees | 1893 | Nonprofit Organization | Health Care Services & Hospitals | Health Care | $2 to $5 billion (USD) | -1 | -1 |
| 2 | 2 | Senior Data Analyst, Insights & Analytics Team... | $37K-$66K (Glassdoor est.) | We’re looking for a Senior Data Analyst who ha... | 3.4 | Squarespace\n3.4 | New York, NY | New York, NY | 1001 to 5000 employees | 2003 | Company - Private | Internet | Information Technology | Unknown / Non-Applicable | GoDaddy | -1 |
| 3 | 3 | Data Analyst | $37K-$66K (Glassdoor est.) | Requisition NumberRR-0001939\nRemote:Yes\nWe c... | 4.1 | Celerity\n4.1 | New York, NY | McLean, VA | 201 to 500 employees | 2002 | Subsidiary or Business Segment | IT Services | Information Technology | $50 to $100 million (USD) | -1 | -1 |
| 4 | 4 | Reporting Data Analyst | $37K-$66K (Glassdoor est.) | ABOUT FANDUEL GROUP\n\nFanDuel Group is a worl... | 3.9 | FanDuel\n3.9 | New York, NY | New York, NY | 501 to 1000 employees | 2009 | Company - Private | Sports & Recreation | Arts, Entertainment & Recreation | $100 to $500 million (USD) | DraftKings | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2248 | 2248 | RQS - IHHA - 201900004460 -1q Data Security An... | $78K-$104K (Glassdoor est.) | Maintains systems to protect data from unautho... | 2.5 | Avacend, Inc.\n2.5 | Denver, CO | Alpharetta, GA | 51 to 200 employees | -1 | Company - Private | Staffing & Outsourcing | Business Services | Unknown / Non-Applicable | -1 | -1 |
| 2249 | 2249 | Senior Data Analyst (Corporate Audit) | $78K-$104K (Glassdoor est.) | Position:\nSenior Data Analyst (Corporate Audi... | 2.9 | Arrow Electronics\n2.9 | Centennial, CO | Centennial, CO | 10000+ employees | 1935 | Company - Public | Wholesale | Business Services | $10+ billion (USD) | Avnet, Ingram Micro, Tech Data | -1 |
| 2250 | 2250 | Technical Business Analyst (SQL, Data analytic... | $78K-$104K (Glassdoor est.) | Title: Technical Business Analyst (SQL, Data a... | -1.0 | Spiceorb | Denver, CO | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 2251 | 2251 | Data Analyst 3, Customer Experience | $78K-$104K (Glassdoor est.) | Summary\n\nResponsible for working cross-funct... | 3.1 | Contingent Network Services\n3.1 | Centennial, CO | West Chester, OH | 201 to 500 employees | 1984 | Company - Private | Enterprise Software & Network Solutions | Information Technology | $25 to $50 million (USD) | -1 | -1 |
| 2252 | 2252 | Senior Quality Data Analyst | $78K-$104K (Glassdoor est.) | You.\n\nYou bring your body, mind, heart and s... | 3.4 | SCL Health\n3.4 | Broomfield, CO | Broomfield, CO | 10000+ employees | 1864 | Nonprofit Organization | Health Care Services & Hospitals | Health Care | $2 to $5 billion (USD) | Centura Health, HealthONE, Denver Health and H... | -1 |
2253 rows × 16 columns
# checking the type of data available in the given data set
job_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2253 entries, 0 to 2252 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 2253 non-null int64 1 Job Title 2253 non-null object 2 Salary Estimate 2253 non-null object 3 Job Description 2253 non-null object 4 Rating 2253 non-null float64 5 Company Name 2252 non-null object 6 Location 2253 non-null object 7 Headquarters 2253 non-null object 8 Size 2253 non-null object 9 Founded 2253 non-null int64 10 Type of ownership 2253 non-null object 11 Industry 2253 non-null object 12 Sector 2253 non-null object 13 Revenue 2253 non-null object 14 Competitors 2253 non-null object 15 Easy Apply 2253 non-null object dtypes: float64(1), int64(2), object(13) memory usage: 281.8+ KB
job_data.isnull().sum()
Unnamed: 0 0 Job Title 0 Salary Estimate 0 Job Description 0 Rating 0 Company Name 1 Location 0 Headquarters 0 Size 0 Founded 0 Type of ownership 0 Industry 0 Sector 0 Revenue 0 Competitors 0 Easy Apply 0 dtype: int64
job_data.head(5)
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Data Analyst, Center on Immigration and Justic... | $37K-$66K (Glassdoor est.) | Are you eager to roll up your sleeves and harn... | 3.2 | Vera Institute of Justice\n3.2 | New York, NY | New York, NY | 201 to 500 employees | 1961 | Nonprofit Organization | Social Assistance | Non-Profit | $100 to $500 million (USD) | -1 | True |
| 1 | 1 | Quality Data Analyst | $37K-$66K (Glassdoor est.) | Overview\n\nProvides analytical and technical ... | 3.8 | Visiting Nurse Service of New York\n3.8 | New York, NY | New York, NY | 10000+ employees | 1893 | Nonprofit Organization | Health Care Services & Hospitals | Health Care | $2 to $5 billion (USD) | -1 | -1 |
| 2 | 2 | Senior Data Analyst, Insights & Analytics Team... | $37K-$66K (Glassdoor est.) | We’re looking for a Senior Data Analyst who ha... | 3.4 | Squarespace\n3.4 | New York, NY | New York, NY | 1001 to 5000 employees | 2003 | Company - Private | Internet | Information Technology | Unknown / Non-Applicable | GoDaddy | -1 |
| 3 | 3 | Data Analyst | $37K-$66K (Glassdoor est.) | Requisition NumberRR-0001939\nRemote:Yes\nWe c... | 4.1 | Celerity\n4.1 | New York, NY | McLean, VA | 201 to 500 employees | 2002 | Subsidiary or Business Segment | IT Services | Information Technology | $50 to $100 million (USD) | -1 | -1 |
| 4 | 4 | Reporting Data Analyst | $37K-$66K (Glassdoor est.) | ABOUT FANDUEL GROUP\n\nFanDuel Group is a worl... | 3.9 | FanDuel\n3.9 | New York, NY | New York, NY | 501 to 1000 employees | 2009 | Company - Private | Sports & Recreation | Arts, Entertainment & Recreation | $100 to $500 million (USD) | DraftKings | True |
job_data.tail(5)
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2248 | 2248 | RQS - IHHA - 201900004460 -1q Data Security An... | $78K-$104K (Glassdoor est.) | Maintains systems to protect data from unautho... | 2.5 | Avacend, Inc.\n2.5 | Denver, CO | Alpharetta, GA | 51 to 200 employees | -1 | Company - Private | Staffing & Outsourcing | Business Services | Unknown / Non-Applicable | -1 | -1 |
| 2249 | 2249 | Senior Data Analyst (Corporate Audit) | $78K-$104K (Glassdoor est.) | Position:\nSenior Data Analyst (Corporate Audi... | 2.9 | Arrow Electronics\n2.9 | Centennial, CO | Centennial, CO | 10000+ employees | 1935 | Company - Public | Wholesale | Business Services | $10+ billion (USD) | Avnet, Ingram Micro, Tech Data | -1 |
| 2250 | 2250 | Technical Business Analyst (SQL, Data analytic... | $78K-$104K (Glassdoor est.) | Title: Technical Business Analyst (SQL, Data a... | -1.0 | Spiceorb | Denver, CO | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 |
| 2251 | 2251 | Data Analyst 3, Customer Experience | $78K-$104K (Glassdoor est.) | Summary\n\nResponsible for working cross-funct... | 3.1 | Contingent Network Services\n3.1 | Centennial, CO | West Chester, OH | 201 to 500 employees | 1984 | Company - Private | Enterprise Software & Network Solutions | Information Technology | $25 to $50 million (USD) | -1 | -1 |
| 2252 | 2252 | Senior Quality Data Analyst | $78K-$104K (Glassdoor est.) | You.\n\nYou bring your body, mind, heart and s... | 3.4 | SCL Health\n3.4 | Broomfield, CO | Broomfield, CO | 10000+ employees | 1864 | Nonprofit Organization | Health Care Services & Hospitals | Health Care | $2 to $5 billion (USD) | Centura Health, HealthONE, Denver Health and H... | -1 |
# this data set contains -1 values in some column so that is consider as missing info
job_data.replace('-1',np.nan,inplace=True)
job_data.replace(-1,np.nan,inplace=True)
job_data.replace(-1.0,np.nan,inplace=True)
job_data.tail(5) #replaced all the -1 with missing info
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2248 | 2248 | RQS - IHHA - 201900004460 -1q Data Security An... | $78K-$104K (Glassdoor est.) | Maintains systems to protect data from unautho... | 2.5 | Avacend, Inc.\n2.5 | Denver, CO | Alpharetta, GA | 51 to 200 employees | NaN | Company - Private | Staffing & Outsourcing | Business Services | Unknown / Non-Applicable | NaN | NaN |
| 2249 | 2249 | Senior Data Analyst (Corporate Audit) | $78K-$104K (Glassdoor est.) | Position:\nSenior Data Analyst (Corporate Audi... | 2.9 | Arrow Electronics\n2.9 | Centennial, CO | Centennial, CO | 10000+ employees | 1935.0 | Company - Public | Wholesale | Business Services | $10+ billion (USD) | Avnet, Ingram Micro, Tech Data | NaN |
| 2250 | 2250 | Technical Business Analyst (SQL, Data analytic... | $78K-$104K (Glassdoor est.) | Title: Technical Business Analyst (SQL, Data a... | NaN | Spiceorb | Denver, CO | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2251 | 2251 | Data Analyst 3, Customer Experience | $78K-$104K (Glassdoor est.) | Summary\n\nResponsible for working cross-funct... | 3.1 | Contingent Network Services\n3.1 | Centennial, CO | West Chester, OH | 201 to 500 employees | 1984.0 | Company - Private | Enterprise Software & Network Solutions | Information Technology | $25 to $50 million (USD) | NaN | NaN |
| 2252 | 2252 | Senior Quality Data Analyst | $78K-$104K (Glassdoor est.) | You.\n\nYou bring your body, mind, heart and s... | 3.4 | SCL Health\n3.4 | Broomfield, CO | Broomfield, CO | 10000+ employees | 1864.0 | Nonprofit Organization | Health Care Services & Hospitals | Health Care | $2 to $5 billion (USD) | Centura Health, HealthONE, Denver Health and H... | NaN |
job_data.isnull().sum()
Unnamed: 0 0 Job Title 0 Salary Estimate 1 Job Description 0 Rating 272 Company Name 1 Location 0 Headquarters 172 Size 163 Founded 660 Type of ownership 163 Industry 353 Sector 353 Revenue 163 Competitors 1732 Easy Apply 2173 dtype: int64
EDA, w ill focus on
Job Title
job_data['Job Title'].value_counts()
Data Analyst 405
Senior Data Analyst 90
Junior Data Analyst 30
Business Data Analyst 28
Sr. Data Analyst 21
...
Data Analyst – Level II 1
Data Analyst/Programmer 1
Data Services Business Analyst 2 1
Call Center Data Analyst 1
Senior Quality Data Analyst 1
Name: Job Title, Length: 1272, dtype: int64
job_data['Job Title'].value_counts()[:26]
Data Analyst 405 Senior Data Analyst 90 Junior Data Analyst 30 Business Data Analyst 28 Sr. Data Analyst 21 Data Analyst Junior 17 Data Analyst II 17 Data Quality Analyst 17 Data Governance Analyst 16 Lead Data Analyst 15 Data Reporting Analyst 13 Financial Data Analyst 12 Data Analyst I 11 Data Analyst III 11 Marketing Data Analyst 9 Sr Data Analyst 9 Data Management Analyst 8 Data Warehouse Analyst 8 Data Science Analyst 7 Technical Data Analyst 7 SQL Data Analyst 7 Research Data Analyst 6 Data Security Analyst 6 Healthcare Data Analyst 6 Clinical Data Analyst 6 Senior Healthcare Data Analyst 5 Name: Job Title, dtype: int64
# combining same position
job_data['Job Title'] = job_data['Job Title'].replace('Sr. Data Analyst','Senior Data Analyst')
job_data['Job Title'] = job_data['Job Title'].replace('Sr Data Analyst','Senior Data Analyst')
job_data['Job Title'] = job_data['Job Title'].replace('Data Analyst Senior','Senior Data Analyst')
job_data['Job Title'] = job_data['Job Title'].replace('Jr. Data Analyst','Junior Data Analyst')
job_data['Job Title'] = job_data['Job Title'].replace('Jr Data Analyst','Junior Data Analyst')
job_data['Job Title'] = job_data['Job Title'].replace('Data Analyst Junior','Junior Data Analyst')
job_data['Job Title'].value_counts()[:26]
Data Analyst 405 Senior Data Analyst 121 Junior Data Analyst 50 Business Data Analyst 28 Data Quality Analyst 17 Data Analyst II 17 Data Governance Analyst 16 Lead Data Analyst 15 Data Reporting Analyst 13 Financial Data Analyst 12 Data Analyst I 11 Data Analyst III 11 Marketing Data Analyst 9 Data Management Analyst 8 Data Warehouse Analyst 8 Data Science Analyst 7 Technical Data Analyst 7 SQL Data Analyst 7 Research Data Analyst 6 Healthcare Data Analyst 6 Data Security Analyst 6 Clinical Data Analyst 6 NY Healthcare Data/Reporting Analyst 5 Data Analyst III (Healthcare Analytics) 5 Product Data Analyst 5 Senior Healthcare Data Analyst 5 Name: Job Title, dtype: int64
job_data['Job Title'].isnull().sum()
0
Salary Estimate
job_data['Salary Estimate']
0 $37K-$66K (Glassdoor est.)
1 $37K-$66K (Glassdoor est.)
2 $37K-$66K (Glassdoor est.)
3 $37K-$66K (Glassdoor est.)
4 $37K-$66K (Glassdoor est.)
...
2248 $78K-$104K (Glassdoor est.)
2249 $78K-$104K (Glassdoor est.)
2250 $78K-$104K (Glassdoor est.)
2251 $78K-$104K (Glassdoor est.)
2252 $78K-$104K (Glassdoor est.)
Name: Salary Estimate, Length: 2253, dtype: object
job_data['Salary Estimate'].isnull().sum()
1
job_data[job_data['Salary Estimate'].isnull()]
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2149 | 2149 | Configuration/Data Management Analyst III | NaN | Job Description\nPosition Title: Configuration... | 4.4 | Protingent\n4.4 | Kent, WA | Bellevue, WA | 51 to 200 employees | 2001.0 | Company - Private | Staffing & Outsourcing | Business Services | $25 to $50 million (USD) | NaN | NaN |
# check the same company have any other ads
job_data[job_data['Company Name']=='Protingent\n4.4']
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | Type of ownership | Industry | Sector | Revenue | Competitors | Easy Apply | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2123 | 2123 | Engines Configuration and Data Management Analyst | $55K-$101K (Glassdoor est.) | Job Description\nPosition Title: Engines Confi... | 4.4 | Protingent\n4.4 | Kent, WA | Bellevue, WA | 51 to 200 employees | 2001.0 | Company - Private | Staffing & Outsourcing | Business Services | $25 to $50 million (USD) | NaN | NaN |
| 2149 | 2149 | Configuration/Data Management Analyst III | NaN | Job Description\nPosition Title: Configuration... | 4.4 | Protingent\n4.4 | Kent, WA | Bellevue, WA | 51 to 200 employees | 2001.0 | Company - Private | Staffing & Outsourcing | Business Services | $25 to $50 million (USD) | NaN | NaN |
job_data['Job Description'][2123]
'Job Description\nPosition Title: Engines Configuration and Data Management Analyst\n\nPosition Description: Protingent Staffing has an exciting contract opportunity with our client in Kent, WA.\n\nJob Qualifications:\nAA/AS or BA/BS in a related technical or liberal arts discipline\n3+ years of direct product configuration management experience (Product structure, parts, drawings, documents and requirements management)\n2+ years of experience working in a highly regulated environment (aerospace, nuclear, government, medical device, etc.)\nExperience working with software related to configuration management of complex product\nWorking knowledge of configuration management principles and practices\nFamiliar with product lifecycle management (PLM) and/or data management tools\nPossess high attention to detail and strong organizational/coordination skills\nProficient in Microsoft Office applications (Word, Excel, PowerPoint).\nExperience in aerospace industry\nProficient in technical writing of process and procedure\nExperience using Windchill, Creo, DOORS, Confluence, and Jira\nFamiliarity with Export Control regulations (EAR/ITAR)\nAbout Protingent: Protingent is a niche provider of top Engineering and IT talent to Software, Electronics, Medical Device, Telecom and Aerospace companies nationwide. Protingent exists to make a positive impact and contribution to the lives of others as well as our community by providing relevant, rewarding and exciting work opportunities for our candidates.\n\nBenefits Package: Protingent offers competitive salary, 100% paid health insurance, education/certification reimbursement, pre-tax commuter benefits, Paid Time Off (PTO) and an administered 401k plan.'
job_data['Job Description'][2149]
'Job Description\nPosition Title: Configuration/Data Management Analyst III\n\nPosition Description: Protingent has an opportunity for a Configuration Analyst in Kent, WA.\n\nJob Qualifications:\nAA/AS or BA/BS in a related technical or liberal arts discipline\n5-7 years of direct product configuration management experience (Product structure, parts, drawings and requirements management experience highly desired).\n3+ years of experience working in a highly regulated environment (security systems, medical device, aerospace, etc.).\nDeep experience with world-class toolsets for configuration management in complex products\nProven ability to communicate effectively both in writing and verbally with staff at all levels of the organization and external stakeholders, including regulatory agencies, customers and suppliers.\nExcellent technical writing and presentation skills.\nProficient with configuration management principles and practices\nProficient with product lifecycle management (PLM) and/or data management tools\nProficient in Microsoft Office applications (Word, Excel, PowerPoint)\nFamiliarity with export control (EAR/ITAR) regulations\nCMII certification/training\nExperience in aerospace or aviation\nExperience with Windchill, Creo, ERP/MES systems\nExperience with DOORS Next Generation\nExperience with Confluence & JIRA\nDatabase skills with SQL, queries, etc\nAbout Protingent: Protingent is a niche provider of top Engineering and IT talent to Software, Electronics, Medical Device, Telecom and Aerospace companies nationwide. Protingent exists to make a positive impact and contribution to the lives of others as well as our community by providing relevant, rewarding and exciting work opportunities for our candidates.\n\nBenefits Package: Protingent offers competitive salary, 100% paid health insurance, education/certification reimbursement, pre-tax commuter benefits, Paid Time Off (PTO) and an administered 401k plan.'
# job description bit different so drop the missing info
job_data.drop(2149, inplace=True)
job_data['Salary Estimate'].isnull().sum()
0
to get numeric values from salry estimate
job_data['Salary Estimate'].sample(3)
646 $42K-$66K (Glassdoor est.) 1739 $40K-$72K (Glassdoor est.) 1048 $43K-$94K (Glassdoor est.) Name: Salary Estimate, dtype: object
job_data['Minimum Salary'] = job_data['Salary Estimate'].str.lstrip('$').str[:2].str.replace('K','').str.strip().astype('f')
job_data['Maximum Salary'] = job_data['Salary Estimate'].str[6:10].str.replace('K','').str.lstrip('$').str.strip().astype('f')
job_data[['Minimum Salary']]
| Minimum Salary | |
|---|---|
| 0 | 37.0 |
| 1 | 37.0 |
| 2 | 37.0 |
| 3 | 37.0 |
| 4 | 37.0 |
| ... | ... |
| 2248 | 78.0 |
| 2249 | 78.0 |
| 2250 | 78.0 |
| 2251 | 78.0 |
| 2252 | 78.0 |
2252 rows × 1 columns
job_data[['Maximum Salary']]
| Maximum Salary | |
|---|---|
| 0 | 66.0 |
| 1 | 66.0 |
| 2 | 66.0 |
| 3 | 66.0 |
| 4 | 66.0 |
| ... | ... |
| 2248 | 104.0 |
| 2249 | 104.0 |
| 2250 | 104.0 |
| 2251 | 104.0 |
| 2252 | 104.0 |
2252 rows × 1 columns
job_data['Average Salary'] = (job_data['Minimum Salary']+job_data['Maximum Salary'])/2
job_data['Average Salary']
0 51.5
1 51.5
2 51.5
3 51.5
4 51.5
...
2248 91.0
2249 91.0
2250 91.0
2251 91.0
2252 91.0
Name: Average Salary, Length: 2252, dtype: float32
job_data[['Salary Estimate','Minimum Salary','Maximum Salary','Average Salary']]
| Salary Estimate | Minimum Salary | Maximum Salary | Average Salary | |
|---|---|---|---|---|
| 0 | $37K-$66K (Glassdoor est.) | 37.0 | 66.0 | 51.5 |
| 1 | $37K-$66K (Glassdoor est.) | 37.0 | 66.0 | 51.5 |
| 2 | $37K-$66K (Glassdoor est.) | 37.0 | 66.0 | 51.5 |
| 3 | $37K-$66K (Glassdoor est.) | 37.0 | 66.0 | 51.5 |
| 4 | $37K-$66K (Glassdoor est.) | 37.0 | 66.0 | 51.5 |
| ... | ... | ... | ... | ... |
| 2248 | $78K-$104K (Glassdoor est.) | 78.0 | 104.0 | 91.0 |
| 2249 | $78K-$104K (Glassdoor est.) | 78.0 | 104.0 | 91.0 |
| 2250 | $78K-$104K (Glassdoor est.) | 78.0 | 104.0 | 91.0 |
| 2251 | $78K-$104K (Glassdoor est.) | 78.0 | 104.0 | 91.0 |
| 2252 | $78K-$104K (Glassdoor est.) | 78.0 | 104.0 | 91.0 |
2252 rows × 4 columns
Job Description
job_data['Job Description'][0]
"Are you eager to roll up your sleeves and harness data to drive policy change? Do you enjoy sifting through complex datasets to illuminate trends and insights? Do you see yourself working for a values-driven organization with a vision to tackle the most pressing injustices of our day?\n\nWe are looking to hire a bright, hard-working, and creative individual with strong data management skills and a demonstrated commitment to immigrant's rights. The Data Analyst will assist with analysis and reporting needs for Veras Center on Immigration and Justice (CIJ), working across its current projects and future Vera initiatives.\n\nWho we are:\n\nFounded in 1961, The Vera Institute is an independent, non-partisan, nonprofit organization that combines expertise in research, technical assistance, and demonstration projects to assist leaders in government and civil society examine justice policy and practice, and improve the systems people rely on for justice and safety.\nWe study problems that impede human dignity and justice.\nWe pilot solutions that are at once transformative and achievable.\nWe engage diverse communities in informed debate.\nAnd we harness the power of evidence to drive effective policy and practice\nWhat were doing:\n\nWe are helping to build a movementamong government leaders, advocates, and the immigration legal services communitytowards universal legal representation for immigrants facing deportation. In the face of stepped-up immigration enforcement, millions of non-citizens are at risk of extended detention and permanent separation from their families and communities. Veras Center on Immigration and Justice (CIJ) partners with government, non-profit partners, and communities to improve government systems that affect immigrants and their families. CIJ administers several nationwide legal services programs for immigrants facing deportation, develops and implements pilot programs, provides technical assistance, and conducts independent research and evaluation.\n\nThats where you come in:\nThe Data Analyst will support the Centers programmatic efforts through regular monitoring and reporting of federal government and subcontractor data. CIJ manages several proprietary databases that run on AWS and Caspio and uses SQL, R, and Python to manage data. This is an opportunity to help shape an innovative national research and policy agenda as part of a dedicated team of experts working to improve access to justice for non-citizens.\n\nVera seeks to hire a Data Analyst to work on various data management projects with its Center on Immigration and Justice (CIJ). In collaboration with other Data Analysts, this position will involve work across several projects, such as the Unaccompanied Childrens Program (UCP), a program to increase legal representation for immigrant children facing deportation without a parent or legal guardian. The position may cover additional duties for the Legal Orientation Program for Custodians (LOPC), which educates the custodians of unaccompanied children about their rights and the immigration court process.\n\nAbout the role:\n\nAs a Data Analyst, you will report to a member of the research team and work in close collaboration with other Vera staff on ongoing database management, monitoring, reporting, and analysis projects. Youll support the team by taking ownership of ongoing monitoring and reporting tasks involving large data sets. Other principal responsibilities will include:\nSupporting research staff by preparing large datasets for analysis, including merging, cleaning, and recoding data;\nProviding insights into program performance through summary statistics and performance indicators;\nProducing timely reports on Vera projects for team members and stakeholders;\nImproving recurring reporting processes by optimizing code and producing subsequent documentation;\nCoordinating database management tasks such as participating in new database design, modifying existing databases, and communicating with outside engineers and subcontractors;\nDeveloping codebooks and delivering user trainings through webinars and database guides;\nBuilding and maintaining interactive dashboards;\nDocumenting and correcting data quality issues;\nWorking with supervisors to prioritize program needs;\nAssisting on other projects and tasks as assigned.\nAbout you:\n\nYoure committed to improving issues affecting immigrants in the United States. Applicants with personal experiences with the immigration system are especially encouraged to apply.\n\nYoure just getting started in your career and have 1 2 years of professional or internship experience working with large datasets and preparing data for analysis.\n\nYou have a real enthusiasm for working with data.\n\nYou are comfortable writing queries in SQL, R, and/or Python, or have a solid foundation coding in other programming languages used to manipulate data. Experience working collaboratively using tools like Git/GitHub is a plus.\n\nYou have exceptional attention to detail, strong problem-solving ability and logical reasoning skills, and the ability to detect anomalies in data.\n\nYoure able to work on multiple projects effectively and efficiently, both independently and collaboratively with a team.\n\nThis position involves working with secure data that may require government security clearance. That clearance is restricted to U.S. citizens and citizens of countries that are party to collective defense agreements with the U.S. The list of those countries is detailed on this webpage. An additional requirement of that clearance is residence in the United States for at least three of the last five years.\n\nHow to apply:\n\nPlease submit cover letter and resume. Applications will be considered on a rolling basis until position is filled. Online submission in PDF format is preferred. Applications with no cover letter attached will not be considered. The cover letter should address your interest in CIJ and this position.\n\nHowever, if necessary, materials may be mailed or faxed to\n\nATTN: Human Resources / CIJ Data Analyst Recruitment\n\nVera Institute of Justice\n\n34 35th St, Suite 4-2A\n\nBrooklyn, NY 11232\n\nFax: (212) 941-9407\n\nPlease use only one method (online, mail or fax) of submission.\n\nNo phone calls, please. Only applicants selected for interviews will be contacted.\n\nVera is an equal opportunity/affirmative action employer. All qualified applicants will be considered for employment without unlawful discrimination based on race, color, creed, national origin, sex, age, disability, marital status, sexual orientation, military status, prior record of arrest or conviction, citizenship status, current employment status, or caregiver status.\n\nVera works to advance justice, particularly racial justice, in an increasingly multicultural country and globally connected world. We value diverse experiences, including with regard to educational background and justice system contact, and depend on a diverse staff to carry out our mission.\n\nFor more information about Vera and CIJs work, please visit www.vera.org.\n\nPowered by JazzHR"
job_data['Job Description'].isnull().sum()
0
job_data['Python Jobs'] = job_data['Job Description'].str.contains('Python')
job_data[['Python Jobs']].value_counts()
Python Jobs False 1632 True 620 dtype: int64
job_data['SQL Jobs'] = job_data['Job Description'].str.contains('SQL')
job_data['SQL Jobs'].value_counts()
True 1378 False 874 Name: SQL Jobs, dtype: int64
job_data['Excel Jobs'] = job_data['Job Description'].str.contains('Excel')
job_data['Excel Jobs'].value_counts()
True 1208 False 1044 Name: Excel Jobs, dtype: int64
job_data['Tableau Jobs'] = job_data['Job Description'].str.contains('Tableau')
job_data['Tableau Jobs'].value_counts()
False 1636 True 616 Name: Tableau Jobs, dtype: int64
job_data[['Python Jobs','SQL Jobs','Excel Jobs','Tableau Jobs']].sum()
Python Jobs 620 SQL Jobs 1378 Excel Jobs 1208 Tableau Jobs 616 dtype: int64
Job Rating
job_data['Rating'].sample(5)
545 3.1 1425 3.5 48 4.9 490 1.0 39 4.6 Name: Rating, dtype: float64
job_data['Rating'].isnull().sum()
272
job_data[job_data['Rating'].isnull()]
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | ... | Revenue | Competitors | Easy Apply | Minimum Salary | Maximum Salary | Average Salary | Python Jobs | SQL Jobs | Excel Jobs | Tableau Jobs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | 11 | Data Analyst | $37K-$66K (Glassdoor est.) | BulbHead is currently seeking a Data Analyst t... | NaN | BulbHead | Fairfield, NJ | NaN | 1 to 50 employees | NaN | ... | Unknown / Non-Applicable | NaN | NaN | 37.0 | 66.0 | 51.5 | True | True | True | True |
| 21 | 21 | Data Science Analyst | $37K-$66K (Glassdoor est.) | Job Description\nOur client, a music streaming... | NaN | MUSIC & Entertainment | New York, NY | Marina del Rey, CA | Unknown | NaN | ... | Unknown / Non-Applicable | NaN | NaN | 37.0 | 66.0 | 51.5 | True | True | False | True |
| 34 | 34 | Data Analyst (Games) | $46K-$87K (Glassdoor est.) | Carry1st is the leading mobile game publisher ... | NaN | Carry1st | New York, NY | NaN | NaN | NaN | ... | NaN | NaN | NaN | 46.0 | 87.0 | 66.5 | True | True | False | True |
| 36 | 36 | Data Business Analyst | $46K-$87K (Glassdoor est.) | At Clear Street, we are disrupting the institu... | NaN | Clear Street | New York, NY | New York, NY | 51 to 200 employees | 2018.0 | ... | $1 to $5 million (USD) | NaN | NaN | 46.0 | 87.0 | 66.5 | True | True | True | False |
| 40 | 40 | Business Analyst, Data Platforms | $46K-$87K (Glassdoor est.) | Company Description\n\nPinto is building the w... | NaN | Pinto | New York, NY | New York, NY | 1 to 50 employees | NaN | ... | Unknown / Non-Applicable | NaN | NaN | 46.0 | 87.0 | 66.5 | False | True | True | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2200 | 2200 | Data Analyst | $49K-$91K (Glassdoor est.) | Role Data Analyst Duration12+ months Location ... | NaN | TechAspect Solutions, Inc. dba TA Digital | Centennial, CO | NaN | NaN | NaN | ... | NaN | NaN | NaN | 49.0 | 91.0 | 70.0 | False | True | True | False |
| 2202 | 2202 | Financial Data Analyst | $49K-$91K (Glassdoor est.) | Position:Financial Data AnalystJob Description... | NaN | Black Knight Financial Technology Solutions | Denver, CO | NaN | NaN | NaN | ... | NaN | NaN | NaN | 49.0 | 91.0 | 70.0 | False | True | True | False |
| 2239 | 2239 | Senior Contract Data Analyst | $78K-$104K (Glassdoor est.) | OverviewAmyx is seeking to hire a Senior Contr... | NaN | Amyx, Iinc. | Aurora, CO | NaN | NaN | NaN | ... | NaN | NaN | NaN | 78.0 | 104.0 | 91.0 | False | False | True | False |
| 2246 | 2246 | Technical Business Analyst (SQL, Data analytic... | $78K-$104K (Glassdoor est.) | Spiceorb is looking for Technical Business Ana... | NaN | Spiceorb | Denver, CO | NaN | NaN | NaN | ... | NaN | NaN | NaN | 78.0 | 104.0 | 91.0 | False | True | True | False |
| 2250 | 2250 | Technical Business Analyst (SQL, Data analytic... | $78K-$104K (Glassdoor est.) | Title: Technical Business Analyst (SQL, Data a... | NaN | Spiceorb | Denver, CO | NaN | NaN | NaN | ... | NaN | NaN | NaN | 78.0 | 104.0 | 91.0 | False | True | True | False |
272 rows × 23 columns
company name
job_data[['Company Name']].sample(10)
| Company Name | |
|---|---|
| 619 | IBM\n3.7 |
| 1555 | Zoom\n4.8 |
| 1822 | Diverse Lynx\n3.9 |
| 1011 | MUFG\n3.1 |
| 1367 | Goldman Sachs\n4.0 |
| 947 | Crown Castle USA Inc\n3.6 |
| 890 | Mondo\n3.9 |
| 1835 | Truist\n3.5 |
| 1956 | QuinStreet\n3.2 |
| 2231 | GHX\n2.8 |
job_data[['Company Name','Rating']].sample(10)
| Company Name | Rating | |
|---|---|---|
| 852 | Jobot\n5.0 | 5.0 |
| 1707 | Texas Water Development Board\n2.8 | 2.8 |
| 1006 | Tech Finders\n3.4 | 3.4 |
| 291 | Equation Staffing | NaN |
| 121 | New York Life Insurance Co\n3.5 | 3.5 |
| 443 | ClientSolv Technologies\n3.3 | 3.3 |
| 295 | Eagle Investment Systems\n3.7 | 3.7 |
| 513 | Stamps.com\n3.1 | 3.1 |
| 2084 | Swoon\n4.6 | 4.6 |
| 708 | Turner Broadcasting\n3.7 | 3.7 |
# company name containe rating also so split it from company name
job_data['Company Name'] = job_data['Company Name'].str.split('\n').str[0]
job_data[['Company Name']].sample(10)
| Company Name | |
|---|---|
| 1736 | ARC Group |
| 1911 | Snowflake Computing |
| 1016 | Isaac Elementary School District #5 |
| 222 | Attentive |
| 587 | UC Davis Medical Center |
| 1563 | Introlligent Inc. |
| 1843 | Staffigo Technical Services, LLC |
| 1986 | Hinge Health |
| 985 | FAMILY INVOLVEMENT CENTER |
| 2111 | Resource Logistics, Inc. |
industry
job_data['Industry'].sample(10)
1467 Computer Hardware & Software 1018 Enterprise Software & Network Solutions 1600 Staffing & Outsourcing 1546 Colleges & Universities 2249 Wholesale 961 Staffing & Outsourcing 2042 IT Services 470 Consulting 17 Consulting 1370 Staffing & Outsourcing Name: Industry, dtype: object
job_data['Industry'].isnull().sum()
353
job_data['Industry'].value_counts()
IT Services 325
Staffing & Outsourcing 322
Health Care Services & Hospitals 151
Computer Hardware & Software 111
Consulting 111
...
Truck Rental & Leasing 1
News Outlet 1
Catering & Food Service Contractors 1
Chemical Manufacturing 1
Hotels, Motels, & Resorts 1
Name: Industry, Length: 88, dtype: int64
sector
job_data['Sector'].sample(10)
485 Aerospace & Defense 1337 Finance 1877 Finance 1125 Health Care 1309 Business Services 1501 Business Services 1616 Business Services 700 Insurance 1438 NaN 43 Business Services Name: Sector, dtype: object
job_data['Sector'].value_counts()
Information Technology 570 Business Services 523 Finance 169 Health Care 151 Education 52 Insurance 51 Accounting & Legal 43 Media 42 Manufacturing 40 Retail 38 Government 36 Biotech & Pharmaceuticals 33 Non-Profit 26 Aerospace & Defense 22 Transportation & Logistics 20 Construction, Repair & Maintenance 16 Consumer Services 14 Oil, Gas, Energy & Utilities 13 Real Estate 12 Telecommunications 11 Restaurants, Bars & Food Services 8 Arts, Entertainment & Recreation 7 Mining & Metals 1 Travel & Tourism 1 Name: Sector, dtype: int64
Analysis part
job_data.head(1)
| Unnamed: 0 | Job Title | Salary Estimate | Job Description | Rating | Company Name | Location | Headquarters | Size | Founded | ... | Revenue | Competitors | Easy Apply | Minimum Salary | Maximum Salary | Average Salary | Python Jobs | SQL Jobs | Excel Jobs | Tableau Jobs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Data Analyst, Center on Immigration and Justic... | $37K-$66K (Glassdoor est.) | Are you eager to roll up your sleeves and harn... | 3.2 | Vera Institute of Justice | New York, NY | New York, NY | 201 to 500 employees | 1961.0 | ... | $100 to $500 million (USD) | NaN | True | 37.0 | 66.0 | 51.5 | True | True | False | False |
1 rows × 23 columns
job_required_data =job_data[['Company Name','Job Title','Rating','Sector','Industry','Minimum Salary','Maximum Salary','Average Salary','Python Jobs','SQL Jobs','Excel Jobs','Tableau Jobs']]
job_required_data.head(10)
| Company Name | Job Title | Rating | Sector | Industry | Minimum Salary | Maximum Salary | Average Salary | Python Jobs | SQL Jobs | Excel Jobs | Tableau Jobs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Vera Institute of Justice | Data Analyst, Center on Immigration and Justic... | 3.2 | Non-Profit | Social Assistance | 37.0 | 66.0 | 51.5 | True | True | False | False |
| 1 | Visiting Nurse Service of New York | Quality Data Analyst | 3.8 | Health Care | Health Care Services & Hospitals | 37.0 | 66.0 | 51.5 | False | True | True | False |
| 2 | Squarespace | Senior Data Analyst, Insights & Analytics Team... | 3.4 | Information Technology | Internet | 37.0 | 66.0 | 51.5 | True | True | False | True |
| 3 | Celerity | Data Analyst | 4.1 | Information Technology | IT Services | 37.0 | 66.0 | 51.5 | False | True | False | True |
| 4 | FanDuel | Reporting Data Analyst | 3.9 | Arts, Entertainment & Recreation | Sports & Recreation | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 5 | Point72 | Data Analyst | 3.9 | Finance | Investment Banking & Asset Management | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 6 | Two Sigma | Business/Data Analyst (FP&A) | 4.4 | Finance | Investment Banking & Asset Management | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 7 | GNY Insurance Companies | Data Science Analyst | 3.7 | Insurance | Insurance Carriers | 37.0 | 66.0 | 51.5 | True | True | False | False |
| 8 | DMGT | Data Analyst | 4.0 | Finance | Venture Capital & Private Equity | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 9 | Riskified | Data Analyst, Merchant Health | 4.4 | Business Services | Research & Development | 37.0 | 66.0 | 51.5 | True | True | False | False |
job_required_data.isnull().sum()
Company Name 1 Job Title 0 Rating 272 Sector 353 Industry 353 Minimum Salary 0 Maximum Salary 0 Average Salary 0 Python Jobs 0 SQL Jobs 0 Excel Jobs 0 Tableau Jobs 0 dtype: int64
job_required_data.describe()
| Rating | Minimum Salary | Maximum Salary | Average Salary | |
|---|---|---|---|---|
| count | 1980.000000 | 2252.000000 | 2252.000000 | 2252.000000 |
| mean | 3.731566 | 51.678509 | 89.979126 | 70.828819 |
| std | 0.670333 | 18.446358 | 29.315569 | 21.327679 |
| min | 1.000000 | 11.000000 | 38.000000 | 33.500000 |
| 25% | 3.300000 | 41.000000 | 70.000000 | 58.000000 |
| 50% | 3.700000 | 49.000000 | 87.000000 | 69.000000 |
| 75% | 4.100000 | 60.000000 | 104.000000 | 80.000000 |
| max | 5.000000 | 99.000000 | 190.000000 | 138.500000 |
import plotly.express as px
fig = px.histogram(job_data,x='Minimum Salary',title='Minimum Salary of Data Analyst jobs',marginal='box')
fig.show()
fig = px.histogram(job_required_data,x='Maximum Salary',title='Maximum Salary of Data Analyst jobs',marginal='box')
fig.show()
fig = px.histogram(job_required_data,x='Average Salary',marginal='box',title='Average Salary of Data Analyst Jobs')
fig.show()
So far analysed all the Data Analyst job family
Data Analyst
data_analyst = job_required_data[job_required_data['Job Title']=='Data Analyst']
data_analyst.head()
| Company Name | Job Title | Rating | Sector | Industry | Minimum Salary | Maximum Salary | Average Salary | Python Jobs | SQL Jobs | Excel Jobs | Tableau Jobs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | Celerity | Data Analyst | 4.1 | Information Technology | IT Services | 37.0 | 66.0 | 51.5 | False | True | False | True |
| 5 | Point72 | Data Analyst | 3.9 | Finance | Investment Banking & Asset Management | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 8 | DMGT | Data Analyst | 4.0 | Finance | Venture Capital & Private Equity | 37.0 | 66.0 | 51.5 | True | True | True | False |
| 10 | NYU Langone Health | Data Analyst | 4.0 | Health Care | Health Care Services & Hospitals | 37.0 | 66.0 | 51.5 | False | False | True | False |
| 11 | BulbHead | Data Analyst | NaN | NaN | NaN | 37.0 | 66.0 | 51.5 | True | True | True | True |
data_analyst.describe()
| Rating | Minimum Salary | Maximum Salary | Average Salary | |
|---|---|---|---|---|
| count | 349.000000 | 405.000000 | 405.000000 | 405.000000 |
| mean | 3.850143 | 50.629631 | 90.874077 | 70.751854 |
| std | 0.679529 | 19.638460 | 35.892197 | 24.237709 |
| min | 1.000000 | 11.000000 | 38.000000 | 33.500000 |
| 25% | 3.400000 | 37.000000 | 67.000000 | 53.500000 |
| 50% | 3.900000 | 50.000000 | 85.000000 | 68.000000 |
| 75% | 4.300000 | 59.000000 | 103.000000 | 79.500000 |
| max | 5.000000 | 99.000000 | 190.000000 | 138.500000 |
fig = px.histogram(data_analyst,x='Minimum Salary',marginal='box',title='Minimum Salary of Data Analyst')
fig.show()
fig = px.histogram(data_analyst,x='Maximum Salary',title='Maximum Salary of Data Analyst',marginal='box')
fig.show()
fig = px.histogram(data_analyst,x='Average Salary',title='Average Salary of Data Analyst',marginal='box')
fig.show()
junior_data = job_required_data[job_required_data['Job Title']=='Junior Data Analyst']
junior_data.head()
| Company Name | Job Title | Rating | Sector | Industry | Minimum Salary | Maximum Salary | Average Salary | Python Jobs | SQL Jobs | Excel Jobs | Tableau Jobs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 64 | Kforce | Junior Data Analyst | 4.1 | Business Services | Staffing & Outsourcing | 51.0 | 88.0 | 69.5 | True | True | True | True |
| 124 | Staffigo Technical Services, LLC | Junior Data Analyst | 5.0 | Information Technology | IT Services | 59.0 | 85.0 | 72.0 | False | True | True | False |
| 346 | Staffigo Technical Services, LLC | Junior Data Analyst | 5.0 | Information Technology | IT Services | 77.0 | 132.0 | 104.5 | False | True | True | False |
| 428 | Abnormal Security | Junior Data Analyst | 4.1 | Information Technology | Enterprise Software & Network Solutions | 26.0 | 47.0 | 36.5 | False | False | False | False |
| 460 | Patient Advocate Foundation | Junior Data Analyst | 2.7 | Non-Profit | Social Assistance | 43.0 | 69.0 | 56.0 | False | True | True | True |
fig = px.histogram(junior_data,x='Minimum Salary',title='Minimum Salary of Jr Data Analyst',marginal='box')
fig.show()
fig = px.histogram(junior_data,x='Maximum Salary',title='Maximum Salary of Jr Data Analyst',marginal='box')
fig.show()
fig = px.histogram(junior_data,x='Average Salary',title='Average Salary of Jr Data Analyst',marginal='box')
fig.show()
Job opening by sector
top_sector = job_required_data['Sector']
top_sector.value_counts()
Information Technology 570 Business Services 523 Finance 169 Health Care 151 Education 52 Insurance 51 Accounting & Legal 43 Media 42 Manufacturing 40 Retail 38 Government 36 Biotech & Pharmaceuticals 33 Non-Profit 26 Aerospace & Defense 22 Transportation & Logistics 20 Construction, Repair & Maintenance 16 Consumer Services 14 Oil, Gas, Energy & Utilities 13 Real Estate 12 Telecommunications 11 Restaurants, Bars & Food Services 8 Arts, Entertainment & Recreation 7 Mining & Metals 1 Travel & Tourism 1 Name: Sector, dtype: int64
fig = px.histogram(top_sector,x='Sector')
fig.show()